All miRNA expression and sample information data are located here:
/mnt/FdmComune/Progetti LAB/Beta-cell ablation in mice/3) ANALISI/MARCO/2023-09-18 miRNA count matrix/output
Longitudinal analysis may be impaired by a strong batch processing effect. See 2023-09-15 miRNA profiling.pptx report.
A minimal low counts filtering was applied:
setwd("/mnt/FdmComune/Progetti LAB/Beta-cell ablation in mice/3) ANALISI/MARCO/2023-09-19 DE miRNAs")
library(tidyverse)
cts <- "../2023-09-05 miRNA count matrix/output/miRNA_wide.csv" %>%
read_csv2() %>%
column_to_rownames("name") %>%
as.matrix
# Experimental design
coldata <- "../_data/Samples.experiment.csv" %>%
read_csv2(col_types = "fffffcn", locale = locale(decimal_mark = ",")) %>%
mutate(
Hemolysis = Hemolysis %>%
factor(levels = c("No", "Little", "Moderate", "Significant", "Strong")),
Day = Day %>%
fct_relabel(~ paste("Day", .x %>% str_pad(2, "left"))),
Condition = Group %>%
fct_recode(BCA="β-cell ablation", IR="Insulin resistance") %>%
paste(Treatment) %>%
factor,
rowname = Sample
) %>%
group_by(Condition) %>%
group_modify(~ .x %>% mutate(Subject = Mouse %>% fct_drop %>% as.integer %>% factor)) %>%
arrange(Sample) %>%
column_to_rownames()
library(DESeq2)
library(ggrepel)
library(ggupset)
library(pheatmap)
library(EnhancedVolcano)
library(BiocParallel)
library(GGally)
library(viridis)
library(ggpointdensity)
library(VennDiagram)
library(ashr)
library(RColorBrewer)
register(SnowParam(12))
cts %>% dim
## [1] 737 72
cts %>% colSums
## 1 2 3 4 5 6 7 8 9 10
## 2202058 1502628 2075319 646126 1494617 882986 1914512 1839206 2495669 752481
## 11 12 13 14 15 16 17 18 19 20
## 1565867 1720161 2197689 1612119 1583410 2018246 1526532 1628952 2013051 2154675
## 21 22 23 24 25 26 27 28 29 30
## 1167474 1159896 1922718 2034482 908323 1039171 1528111 2160906 1880262 2480872
## 31 32 33 34 35 36 37 38 39 40
## 2363008 1141585 1643286 2539664 1588303 1771521 1052104 1641221 921475 2044758
## 41 42 43 44 45 46 47 48 49 50
## 1368366 2360312 2567789 2343702 2171048 819094 1292505 899127 2542832 2755294
## 51 52 53 54 55 56 57 58 59 60
## 2541600 4224741 2603335 3170330 2394511 1895967 2168836 2800867 2627901 2450157
## 61 62 63 64 65 66 67 68 69 70
## 4381055 4052769 2452721 2269985 1904901 2098311 990027 2650943 3457388 1847765
## 71 72
## 2476990 3893666
cts %>% colSums %>% barplot
# Low-counts filtering
cts <- cts[apply(cts, 1, max) >= 10, ]
cts %>% dim
## [1] 587 72
cts %>% colSums
## 1 2 3 4 5 6 7 8 9 10
## 2201848 1502472 2075127 646043 1494437 882864 1914344 1838990 2495403 752418
## 11 12 13 14 15 16 17 18 19 20
## 1565719 1720002 2197488 1611956 1583188 2018027 1526324 1628759 2012876 2154424
## 21 22 23 24 25 26 27 28 29 30
## 1167334 1159771 1922500 2034289 908200 1039044 1527966 2160681 1879977 2480588
## 31 32 33 34 35 36 37 38 39 40
## 2362757 1141477 1643076 2539420 1588089 1771328 1051936 1641006 921337 2044542
## 41 42 43 44 45 46 47 48 49 50
## 1368214 2360098 2567584 2343448 2170849 818977 1292339 899002 2542602 2754980
## 51 52 53 54 55 56 57 58 59 60
## 2541321 4224344 2603004 3170065 2394265 1895816 2168578 2800544 2627674 2449975
## 61 62 63 64 65 66 67 68 69 70
## 4380576 4052406 2452517 2269749 1904698 2098049 989904 2650675 3457022 1847581
## 71 72
## 2476754 3893238
cts %>% colSums %>% barplot
cts %>%
as.data.frame %>%
rownames_to_column("miRNA") %>%
write_excel_csv2("output/counts.raw.csv")
dds <- DESeqDataSetFromMatrix(
countData = cts,
colData = coldata,
design = ~ 0 + Condition * Day + Subject
) %>%
DESeq
plotDispEsts(dds)
rld <- dds %>% rlog(blind = FALSE)
rld %>% plotPCA(intgroup = "Group")
rld %>% plotPCA(intgroup = "Treatment")
rld %>% plotPCA(intgroup = "Day")
rld %>%
assay %>%
cor %>%
`^`(2) %>%
pheatmap(
annotation_col = coldata %>% select(Day:`RNA Concentration`),
show_rownames = F,
show_colnames = F,
main = "Replicates correlation"
)
rld <- rld %>%
assay() %>%
as.data.frame %>%
rownames_to_column("miRNA") %>%
pivot_longer(!miRNA, names_to = "Sample", values_to = "Reads") %>%
inner_join(coldata) %>%
group_by(Group, Treatment, Day, miRNA)
pointdensity <- function(data, mapping, ...) {
ggplot(data, mapping) +
geom_abline(color = "red") +
geom_pointdensity(...) +
scale_color_viridis("Density") +
theme_bw()
}
rld %>%
group_by(Group, miRNA) %>%
summarise(Reads = Reads %>% mean) %>%
pivot_wider(id_cols = miRNA, names_from = Group, values_from = Reads) %>%
select(!miRNA) %>%
ggpairs(lower = list(continuous = pointdensity))
ggsave("output/scatter.mean.Group.rlog.png", width = 16, height = 9)
rld %>%
group_by(Condition, miRNA) %>%
summarise(Reads = Reads %>% mean) %>%
pivot_wider(id_cols = miRNA, names_from = Condition, values_from = Reads) %>%
select(!miRNA) %>%
ggpairs(lower = list(continuous = pointdensity))
ggsave("output/scatter.mean.Condition.rlog.png", width = 16, height = 9)
rld %>%
group_by(Day, miRNA) %>%
summarise(Reads = Reads %>% mean) %>%
pivot_wider(id_cols = miRNA, names_from = Day, values_from = Reads) %>%
select(!miRNA) %>%
ggpairs(lower = list(continuous = pointdensity))
ggsave("output/scatter.mean.Day.rlog.png", width = 16, height = 9)
rld %>%
group_by(Mouse, miRNA) %>%
summarise(Reads = Reads %>% mean) %>%
pivot_wider(id_cols = miRNA, names_from = Mouse, values_from = Reads) %>%
select(!miRNA) %>%
ggpairs(lower = list(continuous = pointdensity)) %>%
ggsave(
filename = "output/scatter.mean.Mouse.rlog.png",
width = 32,
height = 18
)
rld %>%
summarise(Reads = Reads %>% mean) %>%
pivot_wider(names_from = Treatment, values_from = Reads) %>%
pivot_longer(!Group:Control, names_to = "Treatment", values_to = "Treated") %>%
ggplot() +
geom_pointdensity(aes(x = Control, y = Treated)) +
facet_grid(Day ~ Treatment) +
ggtitle("rlog normalized counts") +
scale_color_viridis("Density") +
theme_bw()
ggsave("output/scatter.control_treated.rlog.png", width = 16, height = 9)
rld %>%
summarise(Reads = Reads %>% mean) %>%
pivot_wider(names_from = Day, values_from = Reads) %>%
pivot_longer(!Group:`Day 0`, names_to = "Day", values_to = "Following days") %>%
ggplot() +
geom_pointdensity(aes(x = `Day 0`, y = `Following days`)) +
facet_grid(Day ~ Group + Treatment) +
ggtitle("rlog normalized counts") +
xlab("Baseline") +
scale_color_viridis("Density") +
theme_bw()
ggsave("output/scatter.baseline_otherdays.rlog.png", width = 16, height = 9)
normalized <- dds %>%
counts(normalized = TRUE) %>%
as.data.frame %>%
rownames_to_column("miRNA")
normalized %>%
mutate(across(!miRNA, round)) %>%
write_excel_csv2("output/counts.normalized.csv")
norm.means <- normalized %>%
pivot_longer(!miRNA, names_to = "Sample", values_to = "Counts") %>%
inner_join(coldata, by = "Sample") %>%
group_by(Group, Treatment, Day, miRNA) %>%
summarise(
Mean = Counts %>% mean,
SD = Counts %>% sd
)
norm.means %>%
pivot_wider(names_from = Group:Day, values_from = Mean:SD) %>%
mutate(across(!miRNA, round)) %>%
write_excel_csv2("output/counts.normalized.group.mean.csv")
norm.means %>%
slice_max(Mean, n = 100) %>%
arrange(desc(Mean)) %>%
mutate(
miRNA = miRNA %>% fct_inorder
) %>%
ggplot(aes(x = miRNA, y = Mean)) +
geom_col() +
geom_errorbar(aes(ymin = pmax(0, Mean - SD), ymax = Mean + SD)) +
facet_grid(Group + Treatment + Day ~ .) +
labs(title = "Top 100 expressed miRNAs by group", y = "Mean of normalized counts") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = .5))
ggsave(paste0("output/bar.counts.normalized.top-100.png"), width = 18, height = 32)
norm.means %>%
ggplot(aes(x = Treatment, y = log2(Mean + 1))) +
geom_line(aes(group = miRNA), alpha = .1) +
facet_grid(Day ~ Group, scales = "free_x")
ggsave(paste0("output/line.counts.normalized.group.mean.png"), width = 18, height = 32)
We first try a model with interactions because we assume that samples of the various conditions may respond differently in time:
design(dds)We want to know if we need to account also for subject variability, by comparing the full model with a reduced model without Subject variable.
design(dds) <- ~ 0 + Day * Condition + Subject
dds.LRT <- dds %>%
DESeq(test = "LRT", reduced = as.formula(~ 0 + Day * Condition))
res.LRT <- dds.LRT %>%
results(alpha = 0.05)
sum(res.LRT$padj < 0.05, na.rm = T)
## [1] 9
res.LRT[which(res.LRT$padj < 0.05),]
## log2 fold change (MLE): DayDay.10.ConditionIR.S961
## LRT p-value: '~ 0 + Day * Condition + Subject' vs '~ 0 + Day * Condition'
## DataFrame with 9 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## mmu-miR-146a-5p 459.124 -0.995065 0.321907 23.5380 3.11902e-05
## mmu-miR-191-5p 71134.981 0.148122 0.272329 14.4180 2.38803e-03
## mmu-miR-21a-5p 8988.864 -0.298724 0.402969 14.5226 2.27361e-03
## mmu-miR-29a-3p 20608.008 -0.511596 0.327310 18.6834 3.17863e-04
## mmu-miR-29b-3p 898.159 -0.673286 0.501558 23.3608 3.39607e-05
## mmu-miR-29c-3p 1063.985 -0.606207 0.518255 20.6095 1.26882e-04
## mmu-miR-335-5p 1105.573 -0.246726 0.480422 15.3984 1.50600e-03
## mmu-miR-421-3p 1619.383 0.246957 0.279435 14.6966 2.09512e-03
## mmu-miR-652-3p 2472.044 0.412607 0.349555 16.5402 8.78546e-04
## padj
## <numeric>
## mmu-miR-146a-5p 0.00300553
## mmu-miR-191-5p 0.04696454
## mmu-miR-21a-5p 0.04696454
## mmu-miR-29a-3p 0.01406542
## mmu-miR-29b-3p 0.00300553
## mmu-miR-29c-3p 0.00748603
## mmu-miR-335-5p 0.04442711
## mmu-miR-421-3p 0.04696454
## mmu-miR-652-3p 0.03110051
res.LRT %>%
EnhancedVolcano(
lab = rownames(.),
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
FCcutoff = 0,
title = "LRT of reduced model without Subject",
subtitle = bquote(italic("Full = Day * Condition + Subject")),
caption = paste(
length(which(.$padj < 0.05)),
"out of",
nrow(.),
"features would better fit the full model"
)
)
normalized %>%
filter(miRNA %in% rownames(res.LRT[which(res.LRT$padj < 0.05),])) %>%
pivot_longer(!miRNA, names_to = "Sample", values_to = "Reads") %>%
inner_join(coldata) %>%
ggplot(aes(x = Day, y = log2(Reads + 1), color = Subject)) +
geom_line(aes(group = Mouse)) +
geom_point() +
facet_grid(miRNA ~ Condition, scales = "free") +
scale_color_brewer("Replicate", palette = "Set1", breaks = 1:4, labels = LETTERS[1:4]) +
theme_bw()
Since only 6 miRNAs would better fit a model with Subject variable, we are dropping it and continuing with the reduced model.
Now we test for interactions between Condition and Day.
design(dds) <- ~ 0 + Day * Condition
dds.LRT <- dds %>%
DESeq(test = "LRT", reduced = as.formula(~ 0 + Day + Condition))
res.LRT <- dds.LRT %>%
results(alpha = 0.05)
sum(res.LRT$padj < 0.05, na.rm = T)
## [1] 2
res.LRT[which(res.LRT$padj < 0.05),]
## log2 fold change (MLE): DayDay.10.ConditionIR.S961
## LRT p-value: '~ 0 + Day * Condition' vs '~ 0 + Day + Condition'
## DataFrame with 2 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## mmu-miR-133b-3p 50.3821 0.91367 0.981520 49.1333 3.85044e-07
## mmu-miR-202-3p 59.0982 -2.19563 0.677225 96.0565 3.34419e-16
## padj
## <numeric>
## mmu-miR-133b-3p 1.13010e-04
## mmu-miR-202-3p 1.96304e-13
res.LRT %>%
EnhancedVolcano(
lab = rownames(.),
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
FCcutoff = 0,
title = "LRT of reduced model without interaction",
subtitle = bquote(italic("Full = Day * Condition")),
caption = paste(
length(which(.$padj < 0.05)),
"out of",
nrow(.),
"features would better fit the full model"
)
)
normalized %>%
filter(miRNA %in% rownames(res.LRT[which(res.LRT$padj < 0.05),])) %>%
pivot_longer(!miRNA, names_to = "Sample", values_to = "Reads") %>%
inner_join(coldata) %>%
ggplot(aes(x = Treatment, y = log2(Reads + 1))) +
geom_boxplot(aes(fill = Day)) +
facet_grid(miRNA ~ Group + Day, scales = "free") +
scale_fill_brewer("Timepoint", palette = "Reds") +
theme_bw()
normalized %>%
filter(miRNA %in% rownames(res.LRT[which(res.LRT$padj < 0.05),])) %>%
pivot_longer(!miRNA, names_to = "Sample", values_to = "Reads") %>%
inner_join(coldata) %>%
ggplot(aes(x = Day, y = log2(Reads + 1), color = Subject)) +
geom_line(aes(group = Mouse)) +
geom_point() +
facet_grid(miRNA ~ Condition, scales = "free") +
scale_color_brewer("Replicate", palette = "Set1", breaks = 1:4, labels = LETTERS[1:4]) +
theme_bw()
Since only 2 miRNAs would better fit a model with interactions, we are dropping them and continuing with the simple additive model.
Now we test for Day variable.
design(dds) <- ~ 0 + Day + Condition
dds.LRT <- dds %>%
DESeq(test = "LRT", reduced = as.formula(~ 0 + Condition))
res.LRT <- dds.LRT %>%
results(alpha = 0.05)
sum(res.LRT$padj < 0.05, na.rm = T)
## [1] 226
res.LRT[which(res.LRT$padj < 0.05),]
## log2 fold change (MLE): ConditionIR.S961
## LRT p-value: '~ 0 + Day + Condition' vs '~ 0 + Condition'
## DataFrame with 226 rows and 6 columns
## baseMean log2FoldChange lfcSE stat pvalue
## <numeric> <numeric> <numeric> <numeric> <numeric>
## mmu-let-7a-1-3p 11.48789 0.2287484 0.241160 24.57148 4.61712e-06
## mmu-let-7c-2-3p 11.44265 0.2218056 0.241200 25.54115 2.84322e-06
## mmu-let-7c-5p 88015.06837 0.1080279 0.180658 8.02082 1.81259e-02
## mmu-let-7g-3p 1.68160 -0.0941632 0.499347 12.75606 1.69846e-03
## mmu-let-7i-3p 7.55786 -0.1305544 0.262942 16.00076 3.35336e-04
## ... ... ... ... ... ...
## mmu-miR-382-3p 1.24340 0.157471 0.588243 12.5225 1.90882e-03
## mmu-miR-6238 3.94340 -0.390066 0.373632 24.5118 4.75703e-06
## mmu-miR-136-5p 2.40316 0.907838 0.500212 22.8546 1.08941e-05
## mmu-miR-31-3p 1.51419 0.969192 0.573043 30.7867 2.06421e-07
## mmu-miR-488-3p 1.37907 1.164834 0.842333 13.4657 1.19113e-03
## padj
## <numeric>
## mmu-let-7a-1-3p 5.85661e-05
## mmu-let-7c-2-3p 4.39202e-05
## mmu-let-7c-5p 4.75493e-02
## mmu-let-7g-3p 7.55302e-03
## mmu-let-7i-3p 2.05044e-03
## ... ...
## mmu-miR-382-3p 8.36178e-03
## mmu-miR-6238 5.85661e-05
## mmu-miR-136-5p 1.10256e-04
## mmu-miR-31-3p 4.32747e-06
## mmu-miR-488-3p 5.54914e-03
res.LRT %>%
EnhancedVolcano(
lab = rownames(.),
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
FCcutoff = 0,
title = "LRT of reduced model without Day",
subtitle = bquote(italic("Full = Day + Condition")),
caption = paste(
length(which(.$padj < 0.05)),
"out of",
nrow(.),
"features would better fit the full model"
)
)
Since 226 out of 587 miRNAs fit better a full model accounting for timepoint/batch effect than a reduced model with Condition only, we’re keeping the variable in the design formula.
DE.test <- function(md, dds, ...) {
print(md$plotTitle)
print(md$fileName)
dds %>%
results(contrast = c("Condition", md$contrast1, md$contrast2), alpha = 0.05) %>%
lfcShrink(dds = dds, res = ., type = "ashr") %>%
{
plotMA(.)
summary(.)
.
} %>%
as.data.frame %>%
rownames_to_column("miRNA") %>%
arrange(padj, pvalue) %>%
write_excel_csv2(paste0("output/DE/DE.", md$fileName, ".csv")) %>%
mutate(
group = md$group,
test = md$fileName %>% str_remove("^[^.]*\\."),
direction = if_else(log2FoldChange > 0, "UP", "DOWN"),
.before = miRNA
) %>%
{
EnhancedVolcano(
toptable = .,
lab = .$miRNA,
x = 'log2FoldChange',
y = 'padj',
pCutoff = 0.05,
FCcutoff = 0,
title = md$plotTitle,
subtitle = bquote(italic("BH adjusted p-values")),
caption = paste(
length(which(.$padj < 0.05)),
"differentially expressed features out of",
nrow(.),
"total features"
)
)
ggsave(
paste0("output/DE/DE.", md$fileName, ".volcano.png"),
width = 16,
height = 9
)
.
} %>%
filter(padj < 0.05) %>%
{
if (nrow(.)) {
select(., miRNA, direction) %>%
inner_join(normalized, by = "miRNA") %>%
pivot_longer(!miRNA:direction, names_to = "Sample", values_to = "Reads") %>%
inner_join(coldata, by = "Sample") %>%
filter(
Condition %>% make.names %in% c(md$contrast1, md$contrast2)
) %>%
ggplot(aes(x = Condition, y = log2(Reads + 1))) +
stat_summary(aes(group = miRNA), fun = mean, geom = "line", linetype = "dashed", alpha = .5) +
geom_boxplot(aes(fill = Day), position = position_dodge2(padding = 0.2)) +
facet_wrap(~ direction + miRNA, scales = "free_y") +
ggtitle(md$plotTitle) +
#scale_fill_manual(values = brewer.pal(6, "RdYlBu")[c(5, 1)]) +
scale_fill_manual(values = brewer.pal(6, "RdYlBu")[3:1]) +
scale_x_discrete(expand = expansion(add = 0.4)) +
theme_bw()
ggsave(
paste0("output/DE/DE.", md$fileName, ".boxplot.png"),
width = 16,
height = 9
)
}
.
}
}
design(dds) <- ~ 0 + Condition + Day
de <- tibble()
dds <- dds %>%
DESeq
resultsNames(dds)
## [1] "ConditionBCA.Control" "ConditionBCA.DT.005ng" "ConditionBCA.DT.015ng"
## [4] "ConditionBCA.DT.120ng" "ConditionIR.Control" "ConditionIR.S961"
## [7] "DayDay..4" "DayDay.10"
de <- tribble(
~group, ~contrast1, ~contrast2, ~fileName, ~plotTitle,
"BCA", "BCA.DT.005ng", "BCA.Control", "BCA.DT005-vs-CTR", "BCA DT (5ng) vs Control",
"BCA", "BCA.DT.015ng", "BCA.Control", "BCA.DT015-vs-CTR", "BCA DT (15ng) vs Control",
"BCA", "BCA.DT.120ng", "BCA.Control", "BCA.DT120-vs-CTR", "BCA DT (120ng) vs Control",
"IR", "IR.S961", "IR.Control", "IR.S961-vs-CTR", "IR S961 vs Control",
) %>%
rowwise %>%
group_map(DE.test, dds = dds) %>%
bind_rows(de)
## [1] "BCA DT (5ng) vs Control"
## [1] "BCA.DT005-vs-CTR"
##
## out of 587 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 1, 0.17%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] "BCA DT (15ng) vs Control"
## [1] "BCA.DT015-vs-CTR"
##
## out of 587 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 15, 2.6%
## LFC < 0 (down) : 10, 1.7%
## outliers [1] : 0, 0%
## low counts [2] : 125, 21%
## (mean count < 5)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] "BCA DT (120ng) vs Control"
## [1] "BCA.DT120-vs-CTR"
##
## out of 587 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 2, 0.34%
## LFC < 0 (down) : 3, 0.51%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results
## [1] "IR S961 vs Control"
## [1] "IR.S961-vs-CTR"
##
## out of 587 with nonzero total read count
## adjusted p-value < 0.05
## LFC > 0 (up) : 0, 0%
## LFC < 0 (down) : 0, 0%
## outliers [1] : 0, 0%
## low counts [2] : 0, 0%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results